import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import seaborn as sns
import requests
import os
import re
from bs4 import BeautifulSoup
import datetime
import xlwings
import locale
import random
import glob
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot
from sklearn.model_selection import train_test_split
from sklearn import linear_model, preprocessing, cluster
from sklearn.metrics import r2_score, mean_squared_error
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
# we request the url
req = requests.get('https://www.vendeeglobe.org/fr/classement')
# we get our soup from the downloaded content
soup = BeautifulSoup(req.content)
dates = []
# we extract the arrival dates from the website
for td in soup.find('table', attrs={'class': 'ranking-report'}).findAll('td', attrs={'class': 'row-layout', 'colspan': '2'}):
dates.append(datetime.datetime.strptime(td.text, 'Date arrivée : %d/%m/%Y %H:%M:%S'))
# we extract the first arrival
first = min(dates)
# we extract the available values from the website's list
for option in soup.find('select', attrs={'class': 'form__input m--select onsubmit-rank'}).findAll('option'):
date = option.attrs['value']
# we skip if the date is empty or after the first arrival
if date != '' and datetime.datetime.strptime(date, '%Y%m%d_%H%M%S') < first:
file = requests.get('https://www.vendeeglobe.org/download-race-data/vendeeglobe_'+date+'.xlsx')
open('data/'+date+'.xlsx', 'wb').write(file.content)
def xlsx_broken(file):
excel_app = xlwings.App(visible=False)
excel_book = excel_app.books.open(file)
excel_book.save()
excel_book.close()
excel_app.quit()
return True
for f in glob.glob("data/*"):
xlsx_broken(f)
files = [f for f in os.listdir('data/') if not f.startswith('.')][1:-1]
# we load a file
def load_data(filename):
df = pd.read_excel('data/'+filename, usecols='B:U', skiprows=[0,1,2,3],
names=['Rank', 'Nat. Sail', 'Skipper/crew', 'Hour FR', 'Latitude', 'Longitude',
'Heading ° 30min', 'Speed kts 30min', 'VMG kts 30min', 'Distance nm 30min',
'Heading ° last report', 'Speed kts last report', 'VMG kts last report','Distance nm last report',
'Heading ° 24h', 'Speed kts 24h', 'VMG kts 24h', 'Distance nm 24h', 'DTF', 'DTL'])
# we drop the na values
df = df.dropna()
# we split the string values into separate columns
df['Nationality'] = df['Nat. Sail'].str.split(' ').str[0].str.replace('\n', '')
df['Sail'] = df['Nat. Sail'].str.split(' ').str[1].apply(int)
df = df.drop('Nat. Sail', axis=1)
df['Skipper'] = df['Skipper/crew'].str.split('\n').str[0]
df['Crew'] = df['Skipper/crew'].str.split('\n').str[1].str.title()
df = df.drop('Skipper/crew', axis=1)
# we convert the latitude and longitude into decimal
# Decimal degrees = Degrees + (Minutes/60) + (Seconds/3600), N & E = + / S & W = -
df['Latitude'] = df['Latitude'].apply(lambda s: (1 if s.split('°')[1].split('.')[1][-1] == 'N' else -1)
* (int(s.split('°')[0])
+ int(s.split('°')[1].split('.')[0])/60
+ int(re.sub('[^0-9]', '', s.split('°')[1].split('.')[1]))/3600))
df['Longitude'] = df['Longitude'].apply(lambda s: (1 if s.split('°')[1].split('.')[1][-1] == 'E' else -1)
* (int(s.split('°')[0])
+ int(s.split('°')[1].split('.')[0])/60
+ int(re.sub('[^0-9]', '', s.split('°')[1].split('.')[1]))/3600))
# we convert the time into a proper datetime value
date = re.search('([0-9]{8})', filename).group(0)
df['Hour FR'] = df['Hour FR'].apply(lambda x: datetime.datetime.strptime(date+'_'+x[:5], '%Y%m%d_%H:%M'))
# we set it as the index
df = df.set_index('Hour FR')
# we convert all the numerical values into floats
df['Rank'] = df['Rank'].apply(int)
numerical_cols = ['Heading ° 30min', 'Speed kts 30min', 'VMG kts 30min', 'Distance nm 30min',
'Heading ° last report', 'Speed kts last report', 'VMG kts last report','Distance nm last report',
'Heading ° 24h', 'Speed kts 24h', 'VMG kts 24h', 'Distance nm 24h', 'DTF', 'DTL']
df[numerical_cols] = df[numerical_cols].apply(lambda x: x.str.replace(re.compile('[a-z]+|°'), '')).apply(pd.to_numeric)
# we reorder the columns
df = df[['Rank', 'Nationality', 'Sail', 'Skipper', 'Crew', 'Latitude', 'Longitude', *numerical_cols]]
return df
# For example let's read the first file
df = load_data(files[1])
df.head()
| Rank | Nationality | Sail | Skipper | Crew | Latitude | Longitude | Heading ° 30min | Speed kts 30min | VMG kts 30min | ... | Heading ° last report | Speed kts last report | VMG kts last report | Distance nm last report | Heading ° 24h | Speed kts 24h | VMG kts 24h | Distance nm 24h | DTF | DTL | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Hour FR | |||||||||||||||||||||
| 2020-11-08 17:00:00 | 1 | FRA | 8 | Jérémie Beyou | Charal | 46.353333 | -2.597222 | 268 | 26.0 | 22.6 | ... | 262 | 21.1 | 19.7 | 31.7 | 254 | 1.4 | 1.4 | 34.5 | 24265.1 | 0.0 |
| 2020-11-08 17:00:00 | 2 | FRA | 79 | Charlie Dalin | Apivia | 46.296111 | -2.529167 | 262 | 24.8 | 22.8 | ... | 357 | 0.0 | 0.0 | 2782.5 | 247 | 1.4 | 1.4 | 32.9 | 24265.8 | 0.7 |
| 2020-11-08 17:00:00 | 3 | GBR | 99 | Alex Thomson | Hugo Boss | 46.318889 | -2.535000 | 263 | 22.8 | 20.8 | ... | 257 | 19.9 | 19.1 | 29.9 | 249 | 1.4 | 1.3 | 32.7 | 24266.3 | 1.2 |
| 2020-11-08 17:00:00 | 4 | FRA | 59 | Thomas Ruyant | Linkedout | 46.329167 | -2.541389 | 266 | 24.3 | 21.6 | ... | 260 | 19.9 | 18.9 | 29.9 | 251 | 1.4 | 1.3 | 32.7 | 24266.3 | 1.2 |
| 2020-11-08 17:00:00 | 5 | MON | 10 | Boris Herrmann | Seaexplorer - Yacht Club De Monaco | 46.306111 | -2.500833 | 263 | 23.9 | 21.9 | ... | 257 | 19.0 | 18.2 | 28.2 | 248 | 1.3 | 1.3 | 31.5 | 24267.3 | 2.2 |
5 rows × 21 columns
df_list = []
for f in files:
df_list.append(load_data(f))
df_all_rankings = pd.concat(df_list)
df_all_rankings.head()
| Rank | Nationality | Sail | Skipper | Crew | Latitude | Longitude | Heading ° 30min | Speed kts 30min | VMG kts 30min | ... | Heading ° last report | Speed kts last report | VMG kts last report | Distance nm last report | Heading ° 24h | Speed kts 24h | VMG kts 24h | Distance nm 24h | DTF | DTL | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Hour FR | |||||||||||||||||||||
| 2020-11-08 15:30:00 | 1 | FRA | 18 | Louis Burton | Bureau Vallée 2 | 46.412778 | -1.846667 | 241 | 17.7 | 17.5 | ... | 357 | 0.0 | 0.0 | 2788.0 | 201 | 0.3 | 0.3 | 6.1 | 24293.9 | 0.0 |
| 2020-11-08 15:31:00 | 2 | MON | 10 | Boris Herrmann | Seaexplorer - Yacht Club De Monaco | 46.409444 | -1.839444 | 241 | 11.1 | 10.9 | ... | 357 | 0.0 | 0.0 | 2787.9 | 196 | 0.3 | 0.2 | 6.0 | 24294.2 | 0.4 |
| 2020-11-08 15:30:00 | 3 | FRA | 8 | Jérémie Beyou | Charal | 46.425278 | -1.844167 | 244 | 15.5 | 15.5 | ... | 357 | 0.0 | 0.0 | 2788.5 | 199 | 0.2 | 0.2 | 5.5 | 24294.3 | 0.5 |
| 2020-11-08 15:30:00 | 4 | FRA | 59 | Thomas Ruyant | Linkedout | 46.419722 | -1.835556 | 244 | 13.2 | 13.1 | ... | 357 | 0.0 | 0.0 | 2788.3 | 196 | 0.2 | 0.2 | 5.6 | 24294.5 | 0.6 |
| 2020-11-08 15:30:00 | 5 | FRA | 53 | Maxime Sorel | V And B Mayenne | 46.416389 | -1.832222 | 246 | 10.9 | 10.9 | ... | 357 | 0.0 | 0.0 | 2788.1 | 195 | 0.8 | 0.7 | 5.8 | 24294.5 | 0.6 |
5 rows × 21 columns
# we request the url
headers = {'User-Agent' : 'Mozilla/5.0'}
req_skip = requests.get('https://www.vendeeglobe.org/fr/glossaire', headers = headers)
# we raise an Exception if the status code is not 200
if req_skip.status_code != 200:
raise Exception('Couldn\'t get open URL')
# we get our soup from the downloaded content
soup_skip = BeautifulSoup(req_skip.content)
# we extract the arrival dates from the website
df_list = []
for div in soup_skip.findAll('div', attrs={'class': 'boats-list__popup-infos'}):
# we get the skipper name
skip_name = div.find('h3', attrs={'class': 'boats-list__popup-title'}).text.title()
skippers_info = {'Equipage':skip_name}
# we get the rest of the data
for li in div.find('ul', attrs={'class': 'boats-list__popup-specs-list'}).findAll('li'):
info = li.text.split(' : ')
skippers_info[info[0].strip()] = [info[1].strip()]
# we append all data as dataframes
df_list.append(pd.DataFrame(skippers_info))
# we generate a dataframe containing all of the extracted data
df_skippers = pd.concat(df_list, ignore_index=True)
df_skippers.head()
# we replace the former names of the boat by a propoer list of strings
df_skippers['Anciens noms du bateau'] = df_skippers['Anciens noms du bateau'].fillna('').str.strip().str.split(',')
# we convert all numerical value into floats or int
numerical_cols = ['Longueur', 'Largeur', 'Tirant d\'eau', 'Déplacement (poids)','Hauteur mât','Surface de voiles au près','Surface de voiles au portant']
df_skippers[numerical_cols] = df_skippers[numerical_cols].apply(lambda x: x.str.replace(re.compile('m2|m²|m|t[a-z]*|nc|NC'), '').str.replace(',', '.')).apply(pd.to_numeric)
# we rename the columns to include the units
df_skippers = df_skippers.rename(columns={'Longueur': 'Longueur (m)', 'Largeur': 'Largeur (m)', 'Tirant d\'eau': 'Tirant d\'eau (m)', 'Déplacement (poids)': 'Déplacement (poids) (t)','Hauteur mât': 'Hauteur mât (m)','Surface de voiles au près': 'Surface de voiles au près (m2)', 'Surface de voiles au portant': 'Surface de voiles au portant (m2)'})
# we correct the 'foiler' vs 'foils' categories
df_skippers['Nombre de dérives'] = df_skippers['Nombre de dérives'].str.replace('foiler', 'foils')
# we convert all dates into datetimes
locale.setlocale(locale.LC_ALL, 'fr')
df_skippers['Date de lancement'] = df_skippers['Date de lancement'].apply(lambda x: datetime.datetime.strptime(x, '%d %B %Y'))
df_skippers.head()
df_all_rankings.to_pickle('df_all_rankings.pkl')
df_skippers.to_pickle('df_skippers.pkl')
df_all_rankings = pd.read_pickle('df_all_rankings.pkl')
df_skippers = pd.read_pickle('df_skippers.pkl')
# we merge the two dataframes
df_ranking_skippers = pd.merge(left=df_all_rankings.reset_index(), right=df_skippers, left_on='Crew', right_on='Equipage', how='inner').set_index('Hour FR')
df_ranking_skippers.sample(5)
| Rank | Nationality | Sail | Skipper | Crew | Latitude | Longitude | Heading ° 30min | Speed kts 30min | VMG kts 30min | ... | Date de lancement | Longueur (m) | Largeur (m) | Tirant d'eau (m) | Déplacement (poids) (t) | Nombre de dérives | Hauteur mât (m) | Voile quille | Surface de voiles au près (m2) | Surface de voiles au portant (m2) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Hour FR | |||||||||||||||||||||
| 2021-01-23 11:30:00 | 7 | ITA | 34 | Giancarlo Pedote | Prysmian Group | 31.225278 | -32.255278 | 44 | 8.3 | 8.3 | ... | 2015-04-02 | 18.28 | 5.80 | 4.5 | 8.0 | foils | 29.0 | acier forgé | 300 | 600 |
| 2020-12-09 14:30:00 | 2 | FRA | 59 | Thomas Ruyant | Linkedout | -38.726667 | 81.827500 | 132 | 15.7 | 15.2 | ... | 2019-09-03 | 18.28 | 5.85 | 4.5 | 8.0 | foils | 29.0 | acier forgé | 350 | 560 |
| 2021-01-27 14:30:00 | 21 | FRA | 71 | Manuel Cousin | Groupe Sétin | -24.011667 | -26.806389 | 347 | 8.9 | 7.9 | ... | 2007-02-02 | 18.28 | 5.80 | 4.5 | 9.0 | 2 asymétriques | 28.5 | basculante sur vérin hydraulique | 270 | 560 |
| 2021-01-22 04:30:00 | 17 | GBR | 777 | Pip Hare | Medallia | -26.868611 | -36.178333 | 352 | 4.2 | 4.0 | ... | 1999-07-03 | 18.28 | 5.70 | 4.5 | 9.0 | 2 | 29.0 | carbone | 300 | 580 |
| 2021-02-25 11:30:00 | 25 | FIN | 222 | Ari Huusela | Stark | 34.818056 | -33.468333 | 40 | 12.1 | 11.7 | ... | 2007-08-06 | 18.28 | 5.80 | 4.5 | 8.5 | 2 | 28.0 | acier | 270 | 580 |
5 rows × 36 columns
df_all_rankings.shape
(15272, 21)
print("les équipages, le nom et numéro de leur bateau, leur nationalité ainsi que la distance moyenne parcourue par jour")
df_all_rankings.groupby(['Sail', 'Nationality', 'Skipper', 'Crew'])['Distance nm 24h'].mean().sort_values(ascending=True)
les équipages, le nom et numéro de leur bateau, leur nationalité ainsi que la distance moyenne parcourue par jour
Sail Nationality Skipper Crew 69 FRFRA Sébastien Destremau Merci 212.909353 222 FIN Ari Huusela Stark 234.952211 56 FRA Fabrice Amedeo Newrest - Art Et Fenetres 236.703500 72 FRA Alexia Barrier Tse - 4Myplanet 238.259403 50 FRA Miranda Merron Campagne De France 261.288670 71 FRA Manuel Cousin Groupe Sétin 262.840675 83 FRA Clément Giraud Compagnie Du Lit - Jiliti 264.040067 6 FRA Nicolas Troussel Corum L'Épargne 269.396000 33 ESP Didac Costa One Planet One Ocean 277.675171 777 GBR Pip Hare Medallia 279.559582 7 SUI Alan Roura La Fabrique 284.864510 92 FRA Stéphane Le Diraison Time For Oceans 286.359860 14 FRA Arnaud Boissieres La Mie Câline - Artisans Artipôle 287.263604 11 JPN Kojiro Shiraishi Dmg Mori Global One 288.328196 49 FRA Romain Attanasio Pure - Best Western Hotels And Resorts 289.994085 30 FRA Clarisse Cremer Banque Populaire X 302.536711 99 GBR Alex Thomson Hugo Boss 304.442949 109 FRA Samantha Davies Initiatives - Coeur 306.887117 8 FRA Jérémie Beyou Charal 309.184045 2 FRFRA Armel Tripon L'Occitane En Provence 312.056496 53 FRA Maxime Sorel V And B Mayenne 313.839919 4 FRA Sébastien Simon Arkea Paprec 314.924684 27 FRA Isabelle Joschke Macsf 318.225266 1 FRA Jean Le Cam Yes We Cam ! 320.729303 1000 FRA Damien Seguin Groupe Apicil 320.936420 9 FRA Benjamin Dutreux Omia - Water Family 322.044196 34 ITA Giancarlo Pedote Prysmian Group 325.390329 85 FRA Kevin Escoffier Prb 327.411679 17 FRA Yannick Bestaven Maître Coq Iv 327.464803 10 MON Boris Herrmann Seaexplorer - Yacht Club De Monaco 327.923093 18 FRA Louis Burton Bureau Vallée 2 330.648447 59 FRA Thomas Ruyant Linkedout 330.988017 79 FRA Charlie Dalin Apivia 331.893971 Name: Distance nm 24h, dtype: float64
print("classement moyen de chaque équipage")
df_all_rankings.groupby(['Nationality', 'Skipper', 'Crew'])['Rank'].mean().sort_values()
classement moyen de chaque équipage
Nationality Skipper Crew
FRA Charlie Dalin Apivia 2.286902
Thomas Ruyant Linkedout 3.524793
Yannick Bestaven Maître Coq Iv 4.374741
Jean Le Cam Yes We Cam ! 5.629098
Kevin Escoffier Prb 5.846715
Damien Seguin Groupe Apicil 6.382716
Louis Burton Bureau Vallée 2 6.809524
MON Boris Herrmann Seaexplorer - Yacht Club De Monaco 6.921649
FRA Nicolas Troussel Corum L'Épargne 7.260000
GBR Alex Thomson Hugo Boss 7.461538
FRA Benjamin Dutreux Omia - Water Family 7.572301
Samantha Davies Initiatives - Coeur 10.337423
Sébastien Simon Arkea Paprec 10.398734
ITA Giancarlo Pedote Prysmian Group 10.423868
FRA Maxime Sorel V And B Mayenne 10.659274
Isabelle Joschke Macsf 11.510638
Romain Attanasio Pure - Best Western Hotels And Resorts 13.415896
Clarisse Cremer Banque Populaire X 13.508604
FRFRA Armel Tripon L'Occitane En Provence 16.076772
SUI Alan Roura La Fabrique 16.466783
FRA Arnaud Boissieres La Mie Câline - Artisans Artipôle 17.452297
Stéphane Le Diraison Time For Oceans 17.818499
GBR Pip Hare Medallia 19.221254
ESP Didac Costa One Planet One Ocean 19.964041
FRA Manuel Cousin Groupe Sétin 21.302251
Jérémie Beyou Charal 21.430427
JPN Kojiro Shiraishi Dmg Mori Global One 22.084063
FRA Miranda Merron Campagne De France 23.522167
Clément Giraud Compagnie Du Lit - Jiliti 24.295492
Alexia Barrier Tse - 4Myplanet 24.819403
FIN Ari Huusela Stark 25.955777
FRA Fabrice Amedeo Newrest - Art Et Fenetres 26.965000
FRFRA Sébastien Destremau Merci 27.872902
Name: Rank, dtype: float64
# cumulative distance for 5 random skippers
Skippers = np.random.choice(df_all_rankings['Skipper'].unique(), 5)
for skipper in Skippers[:5]:
skip = df_all_rankings.loc[(df_all_rankings['Skipper'] == skipper),"Distance nm last report"]
skip.cumsum().plot(label = skipper)
plt.ylabel('Distance totale parcourue (km)')
plt.legend();
fig = go.Figure(go.Scattermapbox())
i=0
for skipper in Skippers:
sk = df_all_rankings.loc[df_all_rankings['Skipper'] == skipper]
stamp = sk.index
fig.add_trace(go.Scattermapbox(
mode = "markers+lines",
lon = sk["Longitude"],
lat = sk["Latitude"],
name = skipper,
marker = {'size': 3},
text= stamp ))
i+=1
fig.update_layout(
margin ={'l':0,'t':0,'b':0,'r':0},
mapbox = {
'center': {'lon': 10, 'lat': 10},
'style': "stamen-terrain",
'center': {'lon': -20, 'lat': -20},
'zoom': 1})
fig.show()
# we prepare our plot
fig = plt.figure(figsize=(20, 8));
k = 1
# we pick 4 rndom skippers
skippers = np.random.choice(df_all_rankings['Skipper'].unique(), 4)
columns_graph = ['Speed kts 24h', 'VMG kts 24h', 'Distance nm 24h']
# for each column
for col in columns_graph:
# we add a subplot
ax = fig.add_subplot(1, 3, k)
# we plot the categories in each subplot
for sk in skippers:
ax.plot(df_all_rankings[df_all_rankings['Skipper'] == sk].index,
df_all_rankings.loc[df_all_rankings['Skipper'] == sk, col],
label = sk, alpha=0.7);
ax.set_title(col);
ax.legend();
k+=1
fig.tight_layout()
# we get the average 24h VMG and Distance for the crew we just isolated
VMG_24_APIVIA = df_all_rankings.loc[df_all_rankings['Crew'] == crew_first_last, ('VMG kts 24h', 'Rank')]
# we graph its regression using Seaborn
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.regplot(x='VMG kts 24h', y='Rank', data=VMG_24_APIVIA, marker="+");
# we agregate some numerical values by Type of foil
df_ranking_skippers.groupby(['Nombre de dérives']).agg({'Crew': lambda x: x.nunique(),
'Speed kts 24h': np.mean,
'VMG kts 24h': np.mean,
'Distance nm 24h': np.mean})
| Crew | Speed kts 24h | VMG kts 24h | Distance nm 24h | |
|---|---|---|---|---|
| Nombre de dérives | ||||
| 2 | 7 | 11.234977 | 10.182537 | 269.502003 |
| 2 asymétriques | 1 | 10.951286 | 9.707395 | 262.840675 |
| foils | 15 | 12.906236 | 11.636471 | 309.714768 |
sns.barplot(data=df_ranking_skippers, y='Crew', x='Distance nm 24h', hue='Nombre de dérives', orient='h');
pl = df_ranking_skippers.groupby('Skipper')[['VMG kts 24h', 'Speed kts 24h']].mean()
pl['Foils'] = df_ranking_skippers.groupby('Skipper')['Nombre de dérives'].unique().apply(pd.Series)
pl.head()
| VMG kts 24h | Speed kts 24h | Foils | |
|---|---|---|---|
| Skipper | |||
| Alan Roura | 10.641608 | 11.870455 | foils |
| Alex Thomson | 10.966667 | 12.698718 | foils |
| Ari Huusela | 8.673752 | 9.788873 | 2 |
| Armel Tripon | 11.930118 | 13.003150 | foils |
| Arnaud Boissieres | 10.724558 | 11.969788 | foils |
px.pie(pl, values='VMG kts 24h', names='Foils', title='VMG split by foils')
# we prepare our plot
fig = plt.figure(figsize=(20, 8));
k = 1
# we extract the 4 different types of foils
types = df_ranking_skippers['Nombre de dérives'].unique()
columns_graph = ['Speed kts 24h', 'VMG kts 24h', 'Distance nm 24h']
# for each column
for col in columns_graph:
# we add a subplot
ax = fig.add_subplot(1, 3, k)
# we plot the categories in each subplot
for t in types:
ax.plot(df_ranking_skippers[df_ranking_skippers['Nombre de dérives'] == t].resample('D').mean().index,
df_ranking_skippers.loc[df_ranking_skippers['Nombre de dérives'] == t, col].resample('D').mean(),
label = t, alpha=0.7);
ax.set_title(col);
ax.legend();
k+=1
fig.tight_layout()
print("les voiliers équipés de foils ont des meilleurs résultats (en terme de vitesse, VMG et distance moyennes sur 24h) que ceux équipés de 2 dérives")
les voiliers équipés de foils ont des meilleurs résultats (en terme de vitesse, VMG et distance moyennes sur 24h) que ceux équipés de 2 dérives
# we prepare our plot
fig = plt.figure(figsize=(20, 8));
k = 1
# we pick 4 random crews
crew = df_all_rankings['Crew'].unique()
columns_graph = ['Distance nm 30min', 'Distance nm 24h', 'Distance nm last report']
# we create an aggregate, that will be useful to generate a colors map
agg_derive = df_ranking_skippers.pivot_table(values='Nombre de dérives', index='Crew', aggfunc='first')
# we generate the colors map
colors = ['r' if nd=='foils' else 'g' for nd in agg_derive.values]
# we create an aggregate for the graph
agg_crew = df_all_rankings.pivot_table(values=columns_graph, index='Crew', aggfunc='mean')
# we generate custom legends
custom_legend = [Patch(color='r', label='foils'),
Patch(color='g', label='others/unknown')]
# for each column
for col in columns_graph:
# we add a subplot
ax = fig.add_subplot(1, 3, k)
# we plot the columns values in each subplot
ax.barh(y=agg_crew.index, width=agg_crew[col], color=colors);
ax.set_title(col);
# we hide the labels for the other 2 plots
if k > 1:
ax.get_yaxis().set_visible(False)
k+=1
# we generate and attach the custom legend
ax.legend(handles=custom_legend)
fig.tight_layout()
print("A nouveau, on voit que les voiliers avec foil sont plus performants")
A nouveau, on voit que les voiliers avec foil sont plus performants
! jupyter nbconvert --to html "EL HALABI_KitDataScience_Projet Final.ipynb"
This application is used to convert notebook files (*.ipynb) to various other
formats.
WARNING: THE COMMANDLINE INTERFACE MAY CHANGE IN FUTURE RELEASES.
Options
=======
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
<cmd> --help-all
--debug
set log level to logging.DEBUG (maximize logging output)
Equivalent to: [--Application.log_level=10]
--generate-config
generate default config file
Equivalent to: [--JupyterApp.generate_config=True]
-y
Answer yes to any questions instead of prompting.
Equivalent to: [--JupyterApp.answer_yes=True]
--execute
Execute the notebook prior to export.
Equivalent to: [--ExecutePreprocessor.enabled=True]
--allow-errors
Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--execute' was specified, too.
Equivalent to: [--ExecutePreprocessor.allow_errors=True]
--stdin
read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'
Equivalent to: [--NbConvertApp.from_stdin=True]
--stdout
Write notebook output to stdout instead of files.
Equivalent to: [--NbConvertApp.writer_class=StdoutWriter]
--inplace
Run nbconvert in place, overwriting the existing notebook (only
relevant when converting to notebook format)
Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory=]
--clear-output
Clear output of current file and save in place,
overwriting the existing notebook.
Equivalent to: [--NbConvertApp.use_output_suffix=False --NbConvertApp.export_format=notebook --FilesWriter.build_directory= --ClearOutputPreprocessor.enabled=True]
--no-prompt
Exclude input and output prompts from converted document.
Equivalent to: [--TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True]
--no-input
Exclude input cells and output prompts from converted document.
This mode is ideal for generating code-free reports.
Equivalent to: [--TemplateExporter.exclude_output_prompt=True --TemplateExporter.exclude_input=True]
--allow-chromium-download
Whether to allow downloading chromium if no suitable version is found on the system.
Equivalent to: [--WebPDFExporter.allow_chromium_download=True]
--log-level=<Enum>
Set the log level by value or name.
Choices: any of [0, 10, 20, 30, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL']
Default: 30
Equivalent to: [--Application.log_level]
--config=<Unicode>
Full path of a config file.
Default: ''
Equivalent to: [--JupyterApp.config_file]
--to=<Unicode>
The export format to be used, either one of the built-in formats
['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf',
'python', 'rst', 'script', 'slides', 'webpdf'] or a dotted object name that
represents the import path for an `Exporter` class
Default: ''
Equivalent to: [--NbConvertApp.export_format]
--template=<Unicode>
Name of the template to use
Default: ''
Equivalent to: [--TemplateExporter.template_name]
--template-file=<Unicode>
Name of the template file to use
Default: None
Equivalent to: [--TemplateExporter.template_file]
--writer=<DottedObjectName>
Writer class used to write the results of the conversion
Default: 'FilesWriter'
Equivalent to: [--NbConvertApp.writer_class]
--post=<DottedOrNone>
PostProcessor class used to write the results of the conversion
Default: ''
Equivalent to: [--NbConvertApp.postprocessor_class]
--output=<Unicode>
overwrite base name use for output files. can only be used when converting
one notebook at a time.
Default: ''
Equivalent to: [--NbConvertApp.output_base]
--output-dir=<Unicode>
Directory to write output(s) to. Defaults to output to the directory of each
notebook. To recover previous default behaviour (outputting to the current
working directory) use . as the flag value.
Default: ''
Equivalent to: [--FilesWriter.build_directory]
--reveal-prefix=<Unicode>
The URL prefix for reveal.js (version 3.x). This defaults to the reveal CDN,
but can be any url pointing to a copy of reveal.js.
For speaker notes to work, this must be a relative path to a local copy of
reveal.js: e.g., "reveal.js".
If a relative path is given, it must be a subdirectory of the current
directory (from which the server is run).
See the usage documentation
(https://nbconvert.readthedocs.io/en/latest/usage.html#reveal-js-html-
slideshow) for more details.
Default: ''
Equivalent to: [--SlidesExporter.reveal_url_prefix]
--nbformat=<Enum>
The nbformat version to write. Use this to downgrade notebooks.
Choices: any of [1, 2, 3, 4]
Default: 4
Equivalent to: [--NotebookExporter.nbformat_version]
Examples
--------
The simplest way to use nbconvert is
> jupyter nbconvert mynotebook.ipynb --to html
Options include ['asciidoc', 'custom', 'html', 'latex', 'markdown', 'notebook', 'pdf', 'python', 'rst', 'script', 'slides', 'webpdf'].
> jupyter nbconvert --to latex mynotebook.ipynb
Both HTML and LaTeX support multiple output templates. LaTeX includes
'base', 'article' and 'report'. HTML includes 'basic' and 'full'. You
can specify the flavor of the format used.
> jupyter nbconvert --to html --template lab mynotebook.ipynb
You can also pipe the output to stdout, rather than a file
> jupyter nbconvert mynotebook.ipynb --stdout
PDF is generated via latex
> jupyter nbconvert mynotebook.ipynb --to pdf
You can get (and serve) a Reveal.js-powered slideshow
> jupyter nbconvert myslides.ipynb --to slides --post serve
Multiple notebooks can be given at the command line in a couple of
different ways:
> jupyter nbconvert notebook*.ipynb
> jupyter nbconvert notebook1.ipynb notebook2.ipynb
or you can specify the notebooks list in a config file, containing::
c.NbConvertApp.notebooks = ["my_notebook.ipynb"]
> jupyter nbconvert --config mycfg.py
To see all available configurables, use `--help-all`.
[NbConvertApp] WARNING | pattern 'EL HALABI_KitDataScience_Projet Final.ipynb' matched no files